display(Image(filename='./directions/0.jpg'))
display(Image(filename='./directions/1.jpg'))
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant
One way I can think of is to Bootstrap sample only from observations from subjects with ages 25 to 35. On each bootstrap dataset I would compute a linear regression using age and and intercept. Then I would collect the value of the coefficient for age on each bootstrap iteration. Finally, I would create a histogram with the values of the age coefficients across bootstrap samples and compute the confidence interval on the age coefficients using it. If the CI ranges from negative to positive values, then I would conclude the flat spot it legitimate
If the distribution of the difference in the means from AML and ALL patients was indeed t with 70 degrees of freedom, then by the hint, we´d expect the largest value to be larger than the other 7127 just by chance (the probability that a random value is the largest would be 1/7128). If we look for such a value in the t distribution with 70 degrees of freedom, we get 3.826.
# Import libraries that we'll need...
import pandas as pd
import numpy as np
from sklearn.utils import resample
import matplotlib.pyplot as plt
from scipy import stats
import matplotlib
display(Image(filename='./directions/2_1.jpg'))
display(Image(filename='./directions/2_2.jpg'))
display(Image(filename='./sol/2_1_a.jpg'))
display(Image(filename='./sol/2_1_b_1.jpg'))
display(Image(filename='./sol/2_1_b_2.jpg'))
# Import libraries that we'll need...
import pandas as pd
import numpy as np
from sklearn.utils import resample
from scipy.stats.mstats import winsorize
import matplotlib.pyplot as plt
from scipy import stats
import matplotlib
display(Image(filename='./sol/2_3.jpg'))
display(Image(filename='./sol/2_4.jpg'))
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
display(Image(filename='./directions/3.jpg'))
display(Image(filename='./sol/3_1.jpg'))
display(Image(filename='./sol/3_2_1.jpg'))
display(Image(filename='./sol/3_2_2.jpg'))
display(Image(filename='./sol/3_3_1.jpg'))
display(Image(filename='./sol/3_3_2.jpg'))
import pandas as pd
import numpy as np
from scipy.stats import norm
display(Image(filename='./sol/3_5.jpg'))
display(Image(filename='./sol/3_6_1.jpg'))
display(Image(filename='./sol/3_6_2.jpg'))
display(Image(filename='./directions/4.jpg'))
display(Image(filename='./sol/4_1_1.jpg'))
display(Image(filename='./sol/4_1_2.jpg'))
display(Image(filename='./sol/4_1_3.jpg'))
display(Image(filename='./sol/4_2_1.jpg'))
display(Image(filename='./sol/4_2_2.jpg'))
display(Image(filename='./sol/4_2_3.jpg'))
display(Image(filename='./sol/4_3_1.jpg'))
display(Image(filename='./sol/4_3_2.jpg'))
display(Image(filename='./sol/4_3_3.jpg'))
display(Image(filename='./sol/4_3_4.jpg'))
display(Image(filename='./sol/4_3_5.jpg'))
display(Image(filename='./sol/4_3_6.jpg'))
display(Image(filename='./sol/4_4.jpg'))
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import cauchy
from scipy.optimize import minimize
from statsmodels.regression.linear_model import OLS
display(Image(filename='./sol/4_6_1.jpg'))
display(Image(filename='./sol/4_6_2.jpg'))
display(Image(filename='./directions/5_1.jpg'))
display(Image(filename='./directions/5_2.jpg'))
display(Image(filename='./sol/5_1_1.jpg'))
display(Image(filename='./sol/5_1_2.jpg'))
display(Image(filename='./sol/5_1_3.jpg'))